library(tidyverse)
library(magrittr)
library(lubridate)
library(scales)
library(matrixStats)
library(ggrepel)
library(broom)
library(glue)
library(jsonlite)
library(rvest)
library(RCurl)
library(pander)
library(plotly)
library(cowplot)
library(QuantTools)
library(ggfortify)
library(readxl)
panderOptions("big.mark", ",")
panderOptions("table.split.table", Inf)
panderOptions("table.style", "rmarkdown")
panderOptions("missing", "")
theme_set(theme_bw())
shiftAxisLabel <- function(x, k = 2){
x$x$layout$annotations[[2]]$x <- x$x$layout$annotations[[2]]$x*k
x$x$layout$margin$l <- x$x$layout$margin$l*k
x
}
# Handle updates between 12am & 12pm
dt <- Sys.Date()
if (as.numeric(format(Sys.time(), "%H")) < 11){
dt <- Sys.Date() - 1
}
auStates <- c(
ACT = "Australian Capital Territory",
QLD = "Queensland",
NSW = "New South Wales",
VIC = "Victoria",
SA = "South Australia",
WA = "Western Australia",
NT = "Northern Territory",
TAS = "Tasmania"
)
confirmed <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
read_csv() %>%
pivot_longer(
cols = ends_with("20"),
names_to = "date",
values_to = "confirmed"
) %>%
mutate(
date = str_replace_all(
date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
) %>%
ymd()
) %>%
dplyr::rename(
Country = `Country/Region`
) %>%
dplyr::filter(
!is.na(confirmed),
Country == "Australia"
) %>%
dplyr::select(-Lat, -Long)
guardData <- fromJSON("https://interactive.guim.co.uk/docsdata/1q5gdePANXci8enuiS4oHUJxcxC13d6bjMRSicakychE.json")
altAU <- guardData$sheets$updates %>%
as_tibble() %>%
mutate(
`Province/State` = auStates[State],
Country = "Australia",
Date = parse_date_time(Date, orders = "%d/%m/%Y") %>% ymd()
) %>%
dplyr::select(`Province/State`, Country, date = Date, confirmed = `Cumulative case count`) %>%
mutate(confirmed = as.numeric(confirmed)) %>%
arrange(`Province/State`, date) %>%
tidyr::complete(`Province/State`, Country, date) %>%
group_by(`Province/State`) %>%
fill(confirmed) %>%
dplyr::filter(!is.na(confirmed)) %>%
ungroup()
# Add the data, with the Guardian API being preferentialy selected
confirmed %<>%
mutate(source = "JHU") %>%
bind_rows(
mutate(altAU, source = "API")
) %>%
arrange(
date, `Province/State`, source
) %>%
distinct(`Province/State`, Country, date, .keep_all = TRUE) %>%
dplyr::select(-source)
latestAU <- list()
nswRecov <- nswTests <- NA_real_
nswUrl <- "https://www.health.nsw.gov.au/_layouts/feed.aspx?xsl=1&web=/news&page=4ac47e14-04a9-4016-b501-65a23280e841&wp=baabf81e-a904-44f1-8d59-5f6d56519965&pageurl=/news/Pages/rss-nsw-health.aspx" %>%
read_xml() %>%
xml_find_all("//item") %>%
.[[1]] %>%
as.character() %>%
str_split("\n") %>%
.[[1]] %>%
str_subset("https") %>%
str_extract("https[^<]+")
nswTable <- nswUrl %>%
read_html() %>%
html_nodes("body") %>%
xml_find_all("//table") %>%
.[[1]] %>%
html_table() %>%
# set_colnames(
# str_replace_all(colnames(.), "\\u200b", "")
# ) %>%
set_colnames(
c("Cases", "Count")
) %>%
mutate_all(str_replace_all, pattern = "\\u200b", replacement = "") %>%
mutate(
Count = str_remove_all(Count, "[\\*,]") %>% as.numeric,
Cases = str_replace_all(Cases, " +", " ")
)
nswTests <- nswTable %>%
dplyr::filter(
str_detect(Cases, "Total tests carried out")
) %>%
.[["Count"]]
nswRecov <- nswTable %>%
dplyr::filter(str_detect(Cases, "recovered")) %>%
.[["Count"]]
latestAU$NSW <- tibble(
State = "New South Wales",
date = dt,
confirmed = nswTable %>%
dplyr::filter(str_detect(Cases, "Confirmed cases")) %>%
.[["Count"]],
deaths = nswTable %>%
dplyr::filter(str_detect(Cases, "Deaths")) %>%
.[["Count"]],
recovered = nswRecov,
tested = nswTests
)
qldUrl <- "https://covidlive.com.au/qld"
qldBody <- qldUrl %>%
read_html() %>%
html_nodes("body")
qldTable <- qldBody %>%
xml_find_all("//table[contains(@class, 'DAILY-SUMMARY')]") %>%
html_table() %>%
.[[1]] %>%
mutate(
TOTAL = str_remove_all(TOTAL, ",") %>% as.numeric
)
latestAU$QLD <- tibble(
State = "Queensland",
date = dt,
confirmed = qldTable %>%
dplyr::filter(COUNT == "Cases") %>%
pull(TOTAL),
deaths = qldTable %>%
dplyr::filter(COUNT == "Deaths") %>%
pull(TOTAL),
recovered = qldTable %>%
dplyr::filter(COUNT == "Recoveries") %>%
pull(TOTAL),
tested = qldBody %>%
xml_find_all("//table[contains(@class, 'DAILY-TESTS')]") %>%
html_table() %>%
.[[1]] %>%
mutate(TESTS = str_remove_all(TESTS, ",") %>% as.numeric) %>%
pull(TESTS) %>%
max()
)
vicUrl <- "https://covidlive.com.au/vic"
vicBody <- vicUrl %>%
read_html() %>%
html_nodes("body")
vicTable <- vicBody %>%
xml_find_all("//table[contains(@class, 'DAILY-SUMMARY')]") %>%
html_table() %>%
.[[1]] %>%
mutate(
TOTAL = str_remove_all(TOTAL, ",") %>% as.numeric
)
latestAU$VIC <- tibble(
State = "Victoria",
date = dt,
confirmed = vicTable %>%
dplyr::filter(COUNT == "Cases") %>%
pull(TOTAL),
deaths = vicTable %>%
dplyr::filter(COUNT == "Deaths") %>%
pull(TOTAL),
recovered = vicTable %>%
dplyr::filter(COUNT == "Recoveries") %>%
pull(TOTAL),
tested = vicBody %>%
xml_find_all("//table[contains(@class, 'DAILY-TESTS')]") %>%
html_table() %>%
.[[1]] %>%
mutate(TESTS = str_remove_all(TESTS, ",") %>% as.numeric) %>%
pull(TESTS) %>%
max()
)
waUrl <- "https://covidlive.com.au/wa"
waBody <- waUrl %>%
read_html() %>%
html_nodes("body")
waTable <- waBody %>%
xml_find_all("//table[contains(@class, 'DAILY-SUMMARY')]") %>%
html_table() %>%
.[[1]] %>%
mutate(
TOTAL = str_remove_all(TOTAL, ",") %>% as.numeric
)
latestAU$WA <- tibble(
State = "Western Australia",
date = dt,
confirmed = waTable %>%
dplyr::filter(COUNT == "Cases") %>%
pull(TOTAL),
deaths = waTable %>%
dplyr::filter(COUNT == "Deaths") %>%
pull(TOTAL),
recovered = waTable %>%
dplyr::filter(COUNT == "Recoveries") %>%
pull(TOTAL),
tested = waBody %>%
xml_find_all("//table[contains(@class, 'DAILY-TESTS')]") %>%
html_table() %>%
.[[1]] %>%
mutate(TESTS = str_remove_all(TESTS, ",") %>% as.numeric) %>%
pull(TESTS) %>%
max()
)
saUrl <- "https://covidlive.com.au/sa"
saBody <- saUrl %>%
read_html() %>%
html_nodes("body")
saTable <- saBody %>%
xml_find_all("//table[contains(@class, 'DAILY-SUMMARY')]") %>%
html_table() %>%
.[[1]] %>%
mutate(
TOTAL = str_remove_all(TOTAL, ",") %>% as.numeric
)
latestAU$SA <- tibble(
State = "South Australia",
date = dt,
confirmed = saTable %>%
dplyr::filter(COUNT == "Cases") %>%
pull(TOTAL),
deaths = saTable %>%
dplyr::filter(COUNT == "Deaths") %>%
pull(TOTAL),
recovered = saTable %>%
dplyr::filter(COUNT == "Recoveries") %>%
pull(TOTAL),
tested = saBody %>%
xml_find_all("//table[contains(@class, 'DAILY-TESTS')]") %>%
html_table() %>%
.[[1]] %>%
mutate(TESTS = str_remove_all(TESTS, ",") %>% as.numeric) %>%
pull(TESTS) %>%
max()
)
actRecov <- actTests <- NA_real_
actUrl <- "https://www.covid19.act.gov.au/home"
actTxt <- actUrl %>%
read_html() %>%
html_nodes("body") %>%
xml_find_all(
"//div[contains(@class, 'spf-article-card--tabular')]"
) %>%
html_text() %>%
str_split("\r\n") %>%
.[[1]] %>%
str_trim() %>%
setdiff(y = "") %>%
str_remove_all("[,*]")
actTable <- actTxt[seq(6, length(actTxt))] %>%
matrix(ncol = 2, byrow = TRUE) %>%
set_colnames(c("Category", "Total")) %>%
as_tibble() %>%
mutate(Total = as.numeric(Total))
actConf <- actTxt %>%
str_subset("Total") %>%
str_remove_all("[A-Za-z ]") %>%
as.numeric()
actTests <- actTable %>%
dplyr::filter(str_detect(Category, "Negative test")) %>%
.[["Total"]] %>%
sum() %>%
add(actConf)
actRecov <- actTable %>%
dplyr::filter(str_detect(Category, "recov")) %>%
.[["Total"]]
latestAU$ACT <- tibble(
State = "Australian Capital Territory",
date = dt,
confirmed = actConf,
deaths = 3, # Only able to be added manually.
recovered = actRecov,
tested = actTests
)
actUrl <- "https://covidlive.com.au/act"
actBody <- actUrl %>%
read_html() %>%
html_nodes("body")
actTable <- actBody %>%
xml_find_all("//table[contains(@class, 'DAILY-SUMMARY')]") %>%
html_table() %>%
.[[1]] %>%
mutate(
TOTAL = str_remove_all(TOTAL, ",") %>% as.numeric
)
latestAU$ACT <- tibble(
State = "Australian Capital Territory",
date = dt,
confirmed = actTable %>%
dplyr::filter(COUNT == "Cases") %>%
pull(TOTAL),
deaths = actTable %>%
dplyr::filter(COUNT == "Deaths") %>%
pull(TOTAL),
recovered = actTable %>%
dplyr::filter(COUNT == "Recoveries") %>%
pull(TOTAL),
tested = actBody %>%
xml_find_all("//table[contains(@class, 'DAILY-TESTS')]") %>%
html_table() %>%
.[[1]] %>%
mutate(TESTS = str_remove_all(TESTS, ",") %>% as.numeric) %>%
pull(TESTS) %>%
max()
)
tasUrl <- "https://www.coronavirus.tas.gov.au/facts/cases-and-testing-updates"
tasTables <- tasUrl %>%
read_html() %>%
html_nodes("body") %>%
xml_find_all("//table") %>%
html_table() %>%
.[1:2] %>%
lapply(mutate, Number = str_remove_all(Number, "[^0-9]")) %>%
lapply(mutate, Number = as.numeric(Number))
latestAU$TAS <- tibble(
State = "Tasmania",
date = dt,
confirmed = dplyr::filter(
tasTables[[2]],
str_detect(`Cases in Tasmania`, "Total cases")
)$Number,
deaths = dplyr::filter(
tasTables[[2]],
str_detect(`Cases in Tasmania`, "Deaths")
)$Number,
recovered = dplyr::filter(
tasTables[[2]],
`Cases in Tasmania` == "Recovered"
)$Number,
tested = dplyr::filter(
tasTables[[1]],
`Laboratory tests` == "Total laboratory tests"
)$Number
)
ntUrl <- "https://coronavirus.nt.gov.au/current-status"
ntTable <- ntUrl %>%
read_html() %>%
html_nodes("body") %>%
xml_find_all("//div[contains(@class, 'covid-card-stats')]") %>%
html_text() %>%
str_split("\r\n") %>%
.[[1]] %>%
str_trim() %>%
.[. != ""] %>%
matrix(ncol = 2, byrow = TRUE) %>%
set_colnames(c("Total", "Category")) %>%
as.data.frame() %>%
mutate(Total = str_remove_all(Total, ",") %>% as.numeric())
latestAU$NT <- tibble(
State = "Northern Territory",
date = dt,
confirmed = dplyr::filter(ntTable, str_detect(Category, "confirmed"))$Total,
deaths = 0,
recovered = dplyr::filter(ntTable, str_detect(Category, "recovered"))$Total,
tested = dplyr::filter(ntTable, str_detect(Category, "tests"))$Total
)
latestAU %<>%
bind_rows() %>%
mutate(Country = "Australia")
latestAU %>%
dplyr::select(
`Province/State` = State, Country, date, recovered
) %>%
write_tsv(
here::here(glue("recovered/recovered_{dt}.tsv"))
)
confirmed %<>%
bind_rows(
dplyr::select(latestAU, any_of(colnames(.)), `Province/State` = State)
) %>%
arrange(date) %>%
mutate(f = paste(`Province/State`, Country, sep = "_")) %>%
split(f = .$f) %>%
lapply(mutate, confirmed = cummax(confirmed)) %>%
bind_rows() %>%
dplyr::select(-f) %>%
arrange(Country, `Province/State`, date, desc(confirmed)) %>%
dplyr::distinct(`Province/State`, Country, date, .keep_all = TRUE)
# This is 24hrs behind but has the censored values
# We can update the recovered time series from here
# These deaths are more accurate than JHU and the JHU TS should be updated
nswJSON <- "https://nswdac-covid-19-postcode-heatmap.azurewebsites.net/datafiles/data_Cases2.json" %>%
fromJSON() %>%
.[["data"]] %>%
as_tibble() %>%
separate(Date, into = c("Day", "Month")) %>%
mutate(
Date = paste0("2020-", Month, "-", Day) %>%
as_datetime(format = "%Y-%b-%d") %>%
ymd()
) %>%
group_by(Date) %>%
summarise(
confirmed = sum(Cases),
recovered = sum(Recovered + censored),
deaths = sum(Deaths)
) %>%
mutate(
`Province/State` = "New South Wales",
Country = "Australia"
) %>%
dplyr::rename(date = Date)
auRecTS <- list.files(here::here("recovered"), pattern = "rec.*tsv",full.names = TRUE) %>%
lapply(read_tsv) %>%
bind_rows() %>%
dplyr::filter(!(date %in% nswJSON$date & `Province/State` == "New South Wales")) %>%
bind_rows(dplyr::select(nswJSON, any_of(colnames(.)))) %>%
arrange(`Province/State`, date) %>%
group_by(`Province/State`) %>%
mutate(recovered = cummax(recovered))
recovered <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
read_csv() %>%
pivot_longer(
cols = ends_with("20"),
names_to = "date",
values_to = "recovered"
) %>%
mutate(
date = str_replace_all(
date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
) %>%
ymd()
) %>%
dplyr::rename(
Country = `Country/Region`
) %>%
dplyr::select(-Lat, -Long) %>%
dplyr::filter(Country == "Australia") %>%
bind_rows(auRecTS) %>%
group_by(
`Province/State`, Country, date
) %>%
summarise_at(vars(recovered), max)
deaths <- url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
read_csv() %>%
pivot_longer(
cols = ends_with("20"),
names_to = "date",
values_to = "deaths"
) %>%
mutate(
date = str_replace_all(
date, "(.+)/(.+)/(.+)", "20\\3-\\1-\\2"
) %>%
ymd()
) %>%
dplyr::rename(
Country = `Country/Region`
) %>%
dplyr::filter(Country == "Australia") %>%
dplyr::select(-Lat, -Long) %>%
bind_rows(
dplyr::select(latestAU, any_of(colnames(.)), `Province/State` = State)
) %>%
bind_rows(
dplyr::select(nswJSON, any_of(colnames(.)))
) %>%
arrange(date) %>%
mutate(f = paste(`Province/State`, Country, sep = "_")) %>%
split(f = .$f) %>%
lapply(mutate, deaths = cummax(deaths)) %>%
bind_rows() %>%
dplyr::select(-f) %>%
dplyr::distinct(`Province/State`, Country, date, .keep_all = TRUE)
Data for confirmed cases, recoveries and fatalities was primarily sourced from Johns Hopkins University, using the datasets provided at https://github.com/CSSEGISandData/COVID-19. JHU data is now updated every 24 hours at approximately 3:30(UTC), which is about 1:00PM in Adelaide. As such no accurate, daily updates for international data can be produced until after that time. Importantly, dates associated with confirmed cases from this data source, may differ from dates associated with confirmed cases from Australian sources. For example, cases reported in the morning in Australia may be assigned to the previous day in US data sources.
Live hourly updates for Australia are available from https://covid-19-au.com/ for those who would like an up to the minute breakdown of confirmed cases. Numbers used for generation of this page are updated periodically throughout the day using values provided by individual states at NSW, QLD, VIC, WA, SA, TAS, ACT and the NT. Additional Australian data was obtained from The Guardian. Whilst dates from the Guardian are likely to be reported using the UK time-zone as a reference, this is still closer to Australian time zones than USA-based data from JHU.
Information on recovered cases has been difficult to accurately obtain due inconsistent methods for considering a case as recovered, and lack of reporting for these cases in many jurisdictions. In Australia, some states (e.g. NSW & QLD) have only begun releasing these figures in mid-April, whilst other states such as Victoria were releasing these numbers from mid-March.
International data and figures can be viewed here
ausPops <- tribble(
~State, ~Population,
"New South Wales", 8117976,
"Victoria", 6629870,
"Queensland", 5115451,
"South Australia", 1756494,
"Western Australia", 2630557,
"Tasmania", 535500,
"Northern Territory", 245562,
"Australian Capital Territory", 428060
)
Australian State populations were taken from the ABS Website and were accurate in Sept 2019.
All state-level numbers were obtained from the following sources: NSW, QLD, VIC, WA, SA, TAS, ACT and the NT.
confirmed %>%
dplyr::filter(
date >= sort(unique(date), decreasing = TRUE)[2]
) %>%
bind_rows(
group_by(., date) %>%
summarise(
confirmed = sum(confirmed)
) %>%
mutate(
`Province/State` = "Total"
)
) %>%
group_by(`Province/State`) %>%
mutate(
Increase = c(NA, diff(confirmed)),
`% Increase` = c(NA, diff(confirmed)) / min(confirmed)
) %>%
ungroup() %>%
pivot_wider(
id_cols = `Province/State`,
names_from = date,
values_from = c(confirmed, Increase, `% Increase`)
) %>%
dplyr::select_if(function(x){sum(is.na(x)) <= 1}) %>%
rename(State = `Province/State`) %>%
rename_all(str_remove_all, pattern = "confirmed_") %>%
rename_all(str_remove_all, pattern = "_2020.+") %>%
left_join(
dplyr::select(latestAU, State, confirmed, deaths)
) %>%
mutate_at(
vars(confirmed, deaths),
function(x){
x[is.na(x)] <- sum(x, na.rm = TRUE)
x
}
) %>%
left_join(
auRecTS %>%
dplyr::filter(date == dt) %>%
dplyr::select(State = `Province/State`, recovered) %>%
bind_rows(tibble(State = "Total", recovered = sum(.$recovered)))
) %>%
mutate(
`% Increase` = percent(`% Increase`, accuracy = 0.01),
`Fatality Rate` = percent(deaths / confirmed, accuracy = 0.1),
`Recovery Rate` = percent(recovered / confirmed, accuracy = 0.1),
`Currently Active` = confirmed - recovered - deaths,
) %>%
rename(
Fatalities = deaths,
Recovered = recovered
) %>%
dplyr::select(
State, starts_with("20"), ends_with("Increase"), starts_with("Fatal"), starts_with("Recov"), `Currently Active`
) %>%
pander(
justify = "lrrrrrrrrr",
caption = paste(
"*Confirmed cases, fatalities and recoveries reported by each state at time of preparation.",
"Any states with unchanged, or decreasing confirmed cases may indicate delays with the automated data sources, such as health.gov.au or JHU, or that these states have not yet reported for the day.",
"Please note that some discrepancy with dates may occur due to automated data sources obtained different time zones, such as the USA, the UK and Australia.*"
),
emphasize.strong.rows = nrow(.)
)
| State | 2020-07-11 | 2020-07-12 | Increase | % Increase | Fatalities | Fatality Rate | Recovered | Recovery Rate | Currently Active |
|---|---|---|---|---|---|---|---|---|---|
| Australian Capital Territory | 113 | 113 | 0 | 0.00% | 3 | 2.7% | 105 | 92.9% | 5 |
| New South Wales | 3,285 | 3,289 | 4 | 0.12% | 51 | 1.6% | 3,040 | 92.4% | 198 |
| Northern Territory | 32 | 32 | 0 | 0.00% | 0 | 0.0% | 30 | 93.8% | 2 |
| Queensland | 1,070 | 1,070 | 0 | 0.00% | 6 | 0.6% | 1,060 | 99.1% | 4 |
| South Australia | 443 | 443 | 0 | 0.00% | 4 | 0.9% | 439 | 99.1% | 0 |
| Tasmania | 228 | 228 | 0 | 0.00% | 13 | 5.8% | 213 | 94.2% | 0 |
| Victoria | 3,560 | 3,799 | 239 | 6.71% | 24 | 0.6% | 2,289 | 60.3% | 1,486 |
| Western Australia | 634 | 635 | 1 | 0.16% | 9 | 1.4% | 604 | 95.1% | 22 |
| Total | 9,365 | 9,609 | 244 | 2.61% | 110 | 1.1% | 7,780 | 81.0% | 1,717 |
ausStatsCap <- "*Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Loess curves through all points are shown as continuous lines. Data is only shown from 1^st^ March 2020 as this was the date of the first recorded fatality in Australia. Recovered patient information was also sparse in the early stages of data collection, and as a result estimates of active infections will be a significant underestimate until 6^th^ April. In particular, QLD only began reporting recovered cases on this date. NSW followed a fortnight after this date and as such, only the most recent numbers can be considered as accurate. Below this plot, the same figures can be seen broken down by state.*"
ggplotly(
confirmed %>%
left_join(deaths) %>%
left_join(recovered) %>%
mutate_at(vars(confirmed, deaths, recovered), na_locf) %>%
group_by(Country, date) %>%
summarise_at(vars(confirmed, deaths, recovered), sum) %>%
ungroup() %>%
mutate_at(vars(confirmed, deaths, recovered), cummax) %>%
mutate(active = confirmed - deaths - recovered) %>%
pivot_longer(
cols = c(active, confirmed, deaths, recovered),
names_to = "Status",
values_to = "Total"
) %>%
arrange(Status, date) %>%
dplyr::filter(date > ymd("2020-02-29")) %>%
mutate(
Status = str_to_title(Status),
Status = str_replace_all(Status, "Deaths", "Fatal"),
Status = factor(Status, levels = c("Recovered", "Active", "Fatal"))
) %>%
dplyr::filter(Total > 0) %>%
rename_all(str_to_title) %>%
dplyr::filter(Status != "Confirmed") %>%
ggplot(aes(Date, Total, fill = Status)) +
geom_col() +
geom_line(
data = . %>%
group_by(Date) %>%
summarise(
Total = sum(Total)
) %>%
mutate(Status = "Confirmed"),
colour = "blue"
) +
scale_fill_manual(
values = c(
Active = rgb(0, 0, 0),
Confirmed = rgb(0, 0.3, 0.7),
Fatal = rgb(0.8, 0.2, 0.2),
Recovered = rgb(0.2, 0.7, 0.4)
)
) +
scale_x_date(expand = expansion(c(0, 0.03))) +
scale_y_continuous(expand = expansion(c(0, 0.05))) +
labs("Total Cases")
)
Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Loess curves through all points are shown as continuous lines. Data is only shown from 1st March 2020 as this was the date of the first recorded fatality in Australia. Recovered patient information was also sparse in the early stages of data collection, and as a result estimates of active infections will be a significant underestimate until 6th April. In particular, QLD only began reporting recovered cases on this date. NSW followed a fortnight after this date and as such, only the most recent numbers can be considered as accurate. Below this plot, the same figures can be seen broken down by state.
ggplotly(
confirmed %>%
left_join(deaths) %>%
left_join(recovered) %>%
mutate_at(vars(confirmed, deaths, recovered), na_locf) %>%
rename(State = `Province/State`) %>%
dplyr::filter(
date > ymd("2020-03-19"),
) %>%
arrange(date) %>%
group_by(State) %>%
mutate_at(
vars(confirmed, recovered, deaths), cummax
) %>%
ungroup() %>%
left_join(ausPops) %>%
mutate(active = confirmed - deaths - recovered) %>%
pivot_longer(
cols = c(confirmed, deaths, recovered, active),
names_to = "status",
values_to = "count"
) %>%
dplyr::filter(
count > 0,
!(State %in% c("Queensland", "New South Wales") & status == "recovered" & date < ymd("2020-04-06")),
!(State %in% c("South Australia") & status == "recovered" & date < ymd("2020-04-01")),
!(State %in% c("Tasmania") & status == "recovered" & date < ymd("2020-04-02")),
) %>%
dplyr::filter(status != "confirmed") %>%
mutate(
rate = 1e6*count/Population,
rate = round(rate, 2),
status = str_replace(status, "deaths", "fatal") %>% str_to_title(),
status = factor(status, levels = c("Recovered", "Active", "Fatal"))
) %>%
rename_all(str_to_title) %>%
ggplot(aes(Date, Rate, fill = Status, label = Count)) +
geom_col() +
geom_line(
data = . %>%
group_by(State, Date) %>%
summarise(
Rate = sum(Rate),
Count = sum(Count)
) %>%
mutate(Status = "Confirmed"),
colour = "blue"
) +
facet_wrap(~State, ncol = 4) +
scale_fill_manual(
values = c(
Active = rgb(0, 0, 0),
Confirmed = rgb(0, 0.3, 0.7),
Fatal = rgb(0.8, 0.2, 0.2),
Recovered = rgb(0.2, 0.7, 0.4)
)
) +
scale_x_date(expand = expansion(c(0, 0.03))) +
labs(y = "Rate (Cases / Million)")
)
Breakdown of individual states. Victorian recovered numbers began to be accurately reported from 22nd March, with other states gradually providing this information. NSW/QLD recovered cases have only recently begun being reported and up until the most recent dates, recovered/active values were very approximate for these states. The extreme drop for NSW active cases in early June is a function of the changed reporting strategy implemented by NSW Health.
ggplotly(
confirmed %>%
group_by(`Province/State`) %>%
mutate(daily = c(0, diff(confirmed))) %>%
ungroup() %>%
dplyr::filter(confirmed > 0) %>%
mutate(
daily = case_when(
daily < 0 ~ 0,
daily >= 0 ~ daily
)
) %>%
bind_rows(
group_by(., date) %>%
summarise(daily = sum(daily)) %>%
ungroup() %>%
mutate(`Province/State` = "All States")
) %>%
group_by(`Province/State`) %>%
mutate(
MA = round(sma(daily, 7), 2),
`Above Average` = daily > MA
) %>%
dplyr::filter(date > "2020-03-01") %>%
ggplot(aes(date, daily)) +
geom_col(
aes(fill = `Above Average`, colour = `Above Average`),
data = . %>% dplyr::filter(!is.na(`Above Average`)),
width = 1/2
) +
geom_line(aes(y = MA), colour = "blue") +
facet_wrap(~`Province/State`, scales = "free_y") +
labs(
x = "Date",
y = "Daily New Cases",
fill = "\nAbove\nAverage"
) +
scale_fill_manual(values = c("white", rgb(1, 0.2, 0.2))) +
scale_colour_manual(values = c("grey50", rgb(1, 0.2, 0.2))),
tooltip = c(
"date", "daily", "MA"
)
)
Daily new cases for each state shown against the 7-day average. Days which are above average are highlighted in red.
n <- 14
cp <- glue(
"*Growth factor for each State/Territory.
This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak.
In order to try and minimise volatility a {n} day simple moving average was used, in contrast to the 5 day average as advocated [here](https://www.abc.net.au/news/2020-04-10/coronavirus-data-australia-growth-factor-covid-19/12132478).
This enables assessment of the growth factor over an entire quarantine period.
If no new cases are observed over this period, the value is not able to be calculated.
The dashed vertical lines indicate the day most state borders were closed.*"
)
gf <- list(
confirmed %>%
dplyr::filter(Country == "Australia") %>% #, date < dt) %>%
arrange(date) %>%
group_by(
`Province/State`
) %>%
mutate(
new = c(0, diff(confirmed)),
new_ma = sma(new, n)
) %>%
dplyr::filter(confirmed > 0, !is.na(new_ma)) %>%
mutate(
R = c(NA, new_ma[-1] / new_ma[-n()]),
R = case_when(
is.nan(R) ~ NA_real_,
!is.nan(R) ~ R
)
) %>%
ungroup() %>%
arrange(`Province/State`),
confirmed %>%
dplyr::filter(Country == "Australia") %>% #, date < dt) %>%
arrange(date) %>%
group_by(Country, date) %>%
summarise_at(vars(confirmed), sum) %>%
ungroup() %>%
mutate(
new = c(0, diff(confirmed)),
new_ma = sma(new, n)
) %>%
dplyr::filter(confirmed > 0, !is.na(new_ma)) %>%
mutate(
R = c(NA, new_ma[-1] / new_ma[-n()]),
R = case_when(
is.nan(R) ~ NA_real_,
!is.nan(R) ~ R
),
`Province/State` = "All States"
) %>%
arrange(`Province/State`)
) %>%
bind_rows() %>%
dplyr::filter(date > ymd("2020-03-15")) %>%
ggplot(aes(date, R, colour = `Province/State`)) +
geom_ribbon(aes(ymin = 1, ymax = R), alpha = 0.1) +
geom_hline(yintercept = 1) +
geom_vline(
xintercept = ymd("2020-03-22"),
linetype = 2,
colour = "grey30"
) +
geom_label(
aes(label = R),
data = . %>%
dplyr::filter(date == max(date)) %>%
mutate(R = round(R, 2), date = date + 1),
fill = rgb(1, 1, 1, 0.3),
show.legend = FALSE
) +
labs(
x = "Date", y = "Growth Factor"
) +
facet_wrap(~`Province/State`, scales = "free_x") +
theme(legend.position = "none") +
coord_cartesian(ylim = c(0.0, 3))
gf
Growth factor for each State/Territory. This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak. In order to try and minimise volatility a 14 day simple moving average was used, in contrast to the 5 day average as advocated here. This enables assessment of the growth factor over an entire quarantine period. If no new cases are observed over this period, the value is not able to be calculated. The dashed vertical lines indicate the day most state borders were closed.
The current 14 day growth factor is 1.11 which gives considerable cause for concern. In particular, the outbreak in Victoria is clearly not under control.
latestAU %>%
dplyr::select(State, Tested = tested) %>%
write_tsv(
glue(
"tested/tested_{format(dt, '%Y%m%d')}.tsv"
)
)
latestAU %>%
dplyr::select(State, Tested = tested) %>%
full_join(
confirmed %>%
dplyr::filter(
date == dt,
Country == "Australia"
) %>%
rename(
State = `Province/State`
)
) %>%
mutate(
Tested = case_when(
is.na(Tested) ~ confirmed,
Tested < confirmed ~ confirmed,
!is.na(Tested) ~ Tested
)
) %>%
left_join(ausPops) %>%
bind_rows(
tibble(
State = "Total",
Population = sum(.$Population, na.rm = TRUE),
confirmed = sum(.$confirmed, na.rm = TRUE),
Tested = sum(.$Tested, na.rm = TRUE)
)
) %>%
mutate(
`Tests / '000` = round(1e3 * Tested / Population, 2),
Positive = confirmed / Tested,
Negative = 1 - Positive,
isTotal = grepl("Total", State)
) %>%
dplyr::select(
State, Population,
Confirmed = confirmed,
Tested,
contains("000"),
ends_with("ive"),
isTotal
) %>%
arrange(isTotal, desc(`Tests / '000`)) %>%
dplyr::select(-isTotal) %>%
dplyr::rename(
`% Positive Tests` = Positive,
`% Negative Tests` = Negative
) %>%
mutate_at(
vars(starts_with("%")), percent, accuracy = 0.01
) %>%
pander(
justify = "lrrrrrr",
missing = "",
caption = glue(
"*COVID-19 testing scaled by state population size.
Confirmed cases are assumed to be the tests returning a positive result.
The current numbers available for some states are a lower limit, and as such, the proportion of the population tested is likely to be higher, as is the proportion of tests returning a negative result.*"
),
emphasize.strong.rows = nrow(.)
)
| State | Population | Confirmed | Tested | Tests / ’000 | % Positive Tests | % Negative Tests |
|---|---|---|---|---|---|---|
| Victoria | 6,629,870 | 3,799 | 1,125,414 | 169.8 | 0.34% | 99.66% |
| New South Wales | 8,117,976 | 3,289 | 1,039,385 | 128 | 0.32% | 99.68% |
| Tasmania | 535,500 | 228 | 55,850 | 104.3 | 0.41% | 99.59% |
| South Australia | 1,756,494 | 443 | 173,296 | 98.66 | 0.26% | 99.74% |
| Australian Capital Territory | 428,060 | 113 | 35,872 | 83.8 | 0.32% | 99.68% |
| Queensland | 5,115,451 | 1,070 | 421,738 | 82.44 | 0.25% | 99.75% |
| Western Australia | 2,630,557 | 635 | 204,929 | 77.9 | 0.31% | 99.69% |
| Northern Territory | 245,562 | 32 | 15,784 | 64.28 | 0.20% | 99.80% |
| Total | 25,459,470 | 9,609 | 3,072,268 | 120.7 | 0.31% | 99.69% |
R version 4.0.2 (2020-06-22)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=C, LC_NUMERIC=C, LC_TIME=C, LC_COLLATE=C, LC_MONETARY=C, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: readxl(v.1.3.1), ggfortify(v.0.4.10), QuantTools(v.0.5.7), data.table(v.1.12.8), cowplot(v.1.0.0), plotly(v.4.9.2.1), pander(v.0.6.3), RCurl(v.1.98-1.2), rvest(v.0.3.5), xml2(v.1.3.2), jsonlite(v.1.7.0), glue(v.1.4.1), broom(v.0.7.0), ggrepel(v.0.8.2), matrixStats(v.0.56.0), scales(v.1.1.1), lubridate(v.1.7.9), magrittr(v.1.5), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.0), purrr(v.0.3.4), readr(v.1.3.1), tidyr(v.1.1.0), tibble(v.3.0.2), ggplot2(v.3.3.2) and tidyverse(v.1.3.0)
loaded via a namespace (and not attached): httr(v.1.4.1), viridisLite(v.0.3.0), here(v.0.1), modelr(v.0.1.8), fasttime(v.1.0-2), assertthat(v.0.2.1), highr(v.0.8), selectr(v.0.4-2), blob(v.1.2.1), cellranger(v.1.1.0), yaml(v.2.2.1), pillar(v.1.4.5), backports(v.1.1.8), digest(v.0.6.25), colorspace(v.1.4-1), htmltools(v.0.5.0), pkgconfig(v.2.0.3), haven(v.2.3.1), generics(v.0.0.2), farver(v.2.0.3), ellipsis(v.0.3.1), withr(v.2.2.0), lazyeval(v.0.2.2), cli(v.2.0.2), crayon(v.1.3.4), evaluate(v.0.14), fs(v.1.4.2), fansi(v.0.4.1), Cairo(v.1.5-12.2), tools(v.4.0.2), hms(v.0.5.3), lifecycle(v.0.2.0), munsell(v.0.5.0), reprex(v.0.3.0), compiler(v.4.0.2), rlang(v.0.4.7), grid(v.4.0.2), rstudioapi(v.0.11), htmlwidgets(v.1.5.1), crosstalk(v.1.1.0.1), bitops(v.1.0-6), labeling(v.0.3), rmarkdown(v.2.3), gtable(v.0.3.0), DBI(v.1.1.0), curl(v.4.3), R6(v.2.4.1), gridExtra(v.2.3), knitr(v.1.29), rprojroot(v.1.3-2), stringi(v.1.4.6), Rcpp(v.1.0.5), vctrs(v.0.3.1), dbplyr(v.1.4.4), tidyselect(v.1.1.0) and xfun(v.0.15)